We start with a function just to make it easier to manually select different sets of complaints to view.
cbPalette <- c("#E69F00", "#56B4E9", "#009E73", "#CC79A7", "#F0E442", "#0072B2", "#D55E00")
## Function to plot subset of complaints in a faceted plot
plot_complaint_volume <- function(data, vars, pal) {
plot_data <- data %>% filter(incident_complaint_reported_by_dispatch %in% vars) %>%
group_by(mo_yr, incident_complaint_reported_by_dispatch) %>%
count()
p <- ggplot(plot_data) +
geom_line(aes(x = mo_yr, y = n, color = incident_complaint_reported_by_dispatch), alpha = 1) +
geom_point(aes(x = mo_yr, y = n, color = incident_complaint_reported_by_dispatch), alpha = 0.7) +
geom_vline(xintercept = dmy("02-01-2020"), linetype = "dashed", alpha = 0.8) +
scale_color_manual(values = pal) +
facet_wrap(~incident_complaint_reported_by_dispatch, ncol = 1) +
labs(x = "Date", y = "Monthly Incidents", color = "Complaint") +
theme_minimal()
return(p)
}
For now, let's look at the top 3 complaints by frequency (breathing problems, sick person, and falls).
select_complaints <- function(n) {
## View top 3 complaints
top_complaints <- new_ems_data %>%
group_by(incident_complaint_reported_by_dispatch) %>%
summarize(count = n()) %>%
top_n(n, count)
return(top_complaints$incident_complaint_reported_by_dispatch)
}
complaints <- select_complaints(3)
## Plot
plot_complaint_volume(new_ems_data, vars = complaints, cbPalette) +
labs(title = "Trends in Top 3 Complaints") +
theme(plot.title = element_text(hjust = 0.5, size = 16),
legend.position = "none")
# ggsave(here("output", "temporal_top_complaints_volume.png"))
The top complaints differ from the top provider impressions. We were told that the dispatch complaint report may not be as reliable. May be interesting to explore whether there is systematic variation in these variables (do the impressions differ from the complaints for particular types of incidents? For different demographic characteristics? Spatially?)
## Extract most frequent impressions
top_impressions <- new_ems_data %>%
filter(!is.na(situation_provider_primary_impression_code_and_description)) %>%
group_by(situation_provider_primary_impression_code_and_description) %>%
count() %>%
ungroup() %>%
top_n(10) %>%
arrange(desc(n))
## Selecting by n
top_impressions <- top_impressions$situation_provider_primary_impression_code_and_description
## Pool incidents by month
plot_data <- new_ems_data %>%
filter(situation_provider_primary_impression_code_and_description %in% top_impressions) %>%
group_by(mo_yr, situation_provider_primary_impression_code_and_description) %>%
count() %>%
mutate(situation_provider_primary_impression_code_and_description = factor(situation_provider_primary_impression_code_and_description, levels = top_impressions))
## Plot temporal trends in volume
ggplot(plot_data) +
geom_line(aes(x = mo_yr, y = n, color = situation_provider_primary_impression_code_and_description), alpha = 1) +
geom_point(aes(x = mo_yr, y = n, color = situation_provider_primary_impression_code_and_description), alpha = 0.7) +
geom_vline(xintercept = dmy("01-02-2020"), linetype = "dashed", alpha = 0.8) +
#scale_color_manual(values = cbPalette) +
#facet_wrap(~situation_provider_primary_impression_code_and_description, ncol = 1) +
labs(x = "Date", y = "Monthly Incidents", color = "Complaint", title = "Call Volume for Top Provider Impressions\n") +
theme_minimal() +
#plot_theme #+
theme(legend.position = "none")
## Plot temporal trends in volume
ggplot(plot_data) +
geom_area(aes(x = mo_yr, y = n, fill = situation_provider_primary_impression_code_and_description), color = "black", alpha = 1, position = "fill") +
#geom_point(aes(x = mo_yr, y = n, color = situation_provider_primary_impression_code_and_description), alpha = 0.7) +
geom_vline(xintercept = dmy("01-02-2020"), linetype = "dashed", alpha = 0.8) +
#scale_color_manual(values = cbPalette) +
#facet_wrap(~situation_provider_primary_impression_code_and_description, ncol = 1) +
labs(x = "Date", y = "Monthly Incidents", color = "Complaint", title = "Call Volume for Top Provider Impressions\n") +
theme_minimal() +
#plot_theme #+
theme(legend.position = "none")
# ggsave(here("output", "top_impression_volume_prop.png"), width = 15, height = 12)
plot_data <- new_ems_data %>%
#filter(situation_provider_primary_impression_code_and_description %in% top_impressions) %>%
#mutate(situation_provider_primary_impression_code_and_description = factor(situation_provider_primary_impression_code_and_description,
#levels = top_impressions)) %>%
mutate(primary_impression_category = case_when(is.na(situation_provider_primary_impression_code_and_description) ~ "missing",
str_detect(situation_provider_primary_impression_code_and_description, "alcohol") ~ "abuse of alcohol",
str_detect(situation_provider_primary_impression_code_and_description, "abuse") ~ "abuse of substance",
str_detect(situation_provider_primary_impression_code_and_description, "allergic") ~ "allergic reaction",
str_detect(situation_provider_primary_impression_code_and_description, "ob") ~ "ob",
str_detect(situation_provider_primary_impression_code_and_description, "endocrine") ~ "endocrine",
str_detect(situation_provider_primary_impression_code_and_description, "gi") ~ "gi/gu",
str_detect(situation_provider_primary_impression_code_and_description, " - ") ~ str_extract(situation_provider_primary_impression_code_and_description, "[^-]+"),
!str_detect(situation_provider_primary_impression_code_and_description, " - ") ~ str_extract(situation_provider_primary_impression_code_and_description, "[^\\(]+")),
primary_impression_category = trimws(primary_impression_category)) %>%
group_by(mo_yr, primary_impression_category) %>%
count() %>%
filter(!str_detect(primary_impression_category, "\\|"))
ggplot(plot_data) +
#geom_area(aes(x = mo_yr, y = n, fill = primary_impression_category), alpha = 1, position = "fill") +
geom_line(aes(x = mo_yr, y = n, group = primary_impression_category, color = primary_impression_category), alpha = 0.8) +
geom_vline(xintercept = dmy("01-02-2020"), linetype = "dashed", alpha = 0.8) +
gghighlight(primary_impression_category %in% c("infectious", "respiratory"), use_direct_label = FALSE,
unhighlighted_params = list(alpha = 0.5)) +
#scale_fill_manual(values = cbPalette) +
#scale_color_manual(values = cbPalette) +
#facet_wrap(~situation_provider_primary_impression_code_and_description, ncol = 1) +
scale_colour_manual(values = c("infectious" = "#39a6d1", "respiratory" = "#c95d1b")) +
labs(x = "Date", y = "Monthly Incidents", color = "Impression Category", title = "Call Volume by Impression") +
#facet_grid(~primary_impression_category) +
theme_minimal() #+
## Warning: Tried to calculate with group_by(), but the calculation failed.
## Falling back to ungrouped filter operation...
## Warning: Could not calculate the predicate for layer 2; ignored
#plot_theme
#ggsave(here("output", "impression_category_volume.png"), width = 10, height = 6)
Plotting without faceting quickly gets too busy, but converting to an interactive layout with plotly can help with this by allowing for highlighting. Here are the top 8 complaints plotted together for better comparison.
#vars <- complaints
top_complaints <- new_ems_data %>%
group_by(incident_complaint_reported_by_dispatch) %>%
summarize(n = n()) %>%
top_n(8, n)
vars <- top_complaints$incident_complaint_reported_by_dispatch
plot_data <- new_ems_data %>% filter(incident_complaint_reported_by_dispatch %in% vars) %>%
mutate(mo_yr = dmy(paste0("01-", mo, "-", yr))) %>%
group_by(mo_yr, incident_complaint_reported_by_dispatch) %>%
count()
# cbPalette <- c("#E69F00", "#56B4E9", "#009E73", "#CC79A7", "#F0E442", "#0072B2", "#D55E00")
## Key to tell plotly what variable to highlight by
key <- highlight_key(plot_data, ~incident_complaint_reported_by_dispatch)
## Create ggplot
plt <- ggplot(key, aes(text = paste('Complaint', incident_complaint_reported_by_dispatch, sep = ": "))) +
geom_line(aes(x = mo_yr, y = n, color = incident_complaint_reported_by_dispatch), alpha = 1) +
#geom_point(aes(x = mo_yr, y = n), alpha = 0.7) +
geom_vline(xintercept = dmy("02-01-2020"), linetype = "dashed", alpha = 0.8) +
scale_color_brewer(palette = "Set2") +
labs(title = "Call Volume by Reported Complaint", x = "Date", y = "Incidents per month") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
## Convert to plotly and set highlight options
plt_highlight <- ggplotly(plt, tooltip = c("text")) %>%
highlight(on = "plotly_hover", off = "plotly_doubleclick") %>%
layout(showlegend = FALSE)
plt_highlight
## Helper function since we will probably have to clean the response time variable this way a lot
## Allows us to look at response times at the unit call sign level of analysis
clean_response_times <- function(data) {
## Split incidents with multiple units into multiple rows
cln_data <- data %>%
distinct() %>% ## Avoid double counting for calls with multiple patients
mutate(total_unit_response_time = str_split(total_unit_response_time, "\\|")) %>% ## Split
unnest(total_unit_response_time) %>%
mutate(total_unit_response_time = as.numeric(total_unit_response_time))
return(cln_data)
}
## Function to unlist multiple columns. Maybe Ellen has already put something togehter to do this more efficiently.
## Not even sure this is matching the correct units to the correct times so maybe just use different data would be the best way to go.
## Only works on cases where the numbers of list items in each of the columns are consistent with each other
unlist_data <- function(data) {
cln_data <- data %>%
distinct() %>%
mutate(total_unit_response_time = str_split(total_unit_response_time, "\\|"),
response_ems_unit_call_sign = str_split(response_ems_unit_call_sign, "\\|")) %>%
unnest(c(total_unit_response_time, response_ems_unit_call_sign))
return(cln_data)
}
Here we take a look at the average response time over time. Two obvious outliers emerge - one on April 7, 2017, and one on August 12, 2017. The latter is obviously the Unite the Right rally that took place in Charlottesville on that date. I'm not yet sure about the April 7 incident - we should ask contacts in Charlottesville if they know why this might be.
## This is going to be at the response unit level, so clean up duplicate individuals and split response times for separate units at single incident
## Outlier spikes on April 7 and August 12 of 2017. August 12 is obviously the Unite the Right rally. Not sure about April 7
new_ems_data %>%
clean_response_times() %>%
group_by(incident_date) %>%
mutate(avg_response = mean(total_unit_response_time, na.rm = TRUE)) %>%
ggplot() +
geom_line(aes(x = incident_date, y = avg_response))
It's curious that it happens on August 12, but that incident still looks like an outlier to me. These spikes are driven by two very long responses, and not because all the calls on that day were abnormally long. Both calls were about 24 hours so I thought it might have to do with an impoperly recorded date (one day ahead or somemthing), but the psap date time and incident date are consistent.
new_ems_data %>%
clean_response_times() %>%
filter(total_unit_response_time > 1000) %>%
select(response_incident_number, incident_date, total_unit_response_time, incident_psap_call_date_time)
## # A tibble: 2 x 4
## response_incident_n… incident_date total_unit_response… incident_psap_call_da…
## <chr> <date> <dbl> <dttm>
## 1 2017-00007421 2017-08-12 1441. 2017-08-12 17:14:36
## 2 2017-00003319 2017-04-07 1445. 2017-04-07 18:15:12
Here we have the same thing (with outliers removed) but averaged at the monthly level to see if any longer-term trends emerge. Looks like response times are trending upwards a bit.
## Same thing but grouped by month for clearer sense of trends
new_ems_data %>%
filter(total_unit_response_time < 1000) %>%
clean_response_times() %>%
group_by(mo_yr) %>%
mutate(avg_response = mean(total_unit_response_time, na.rm = TRUE)) %>%
ggplot() +
geom_line(aes(x = mo_yr, y = avg_response)) +
labs(title = "Average Total Response Times by Month", y = "Mean Response Time (min)", x = "Date") +
theme_minimal() +
plot_theme +
theme(legend.position = "none")
This is super tentative, but could be an interesting idea: after the pandemic begins, do we see the response times go down for more "concerning" dispatch reports? There's a bit of evidence that maybe response times for complaints of breathing problems and sick people go down while chest pain and falls increase. These are obviously just a tiny subset of complaints, but worth exploring further?
complaints <- select_complaints(4)
## Look at response times by complaint type - might have to just select minimum response time for multi-unit calls to avoid double counting
new_ems_data %>%
filter(total_unit_response_time < 1000) %>%
clean_response_times() %>%
group_by(response_incident_number) %>%
filter(!is.na(total_unit_response_time), incident_complaint_reported_by_dispatch %in% complaints) %>%
mutate(min_total_time = min(total_unit_response_time)) %>%
ungroup() %>%
group_by(mo_yr, incident_complaint_reported_by_dispatch) %>%
mutate(avg_response = mean(as.numeric(total_unit_response_time), na.rm = TRUE)) %>%
ggplot() +
geom_line(aes(x = mo_yr, y = avg_response, color = incident_complaint_reported_by_dispatch), alpha = 0.4) +
geom_smooth(aes(x = mo_yr, y = avg_response, color = incident_complaint_reported_by_dispatch, fill = incident_complaint_reported_by_dispatch), alpha=0.2, show.legend = FALSE) +
scale_color_manual(values = cbPalette) +
scale_fill_manual(values = cbPalette) +
labs(title = "Average Total Response Times by Month: Charlottesville", y = "Mean Response Time (min)", x = "Date", color = "Complaint") +
guides(color = guide_legend(override.aes = list(alpha = 1))) +
theme_minimal() #+
#plot_theme
# ggsave(here("output", "response_times_by_month_top_complaints.png"), height = 10, width = 15)
## Pool incidents by month
monthly_counts <- new_ems_data %>%
#filter(!is.na(patient_race_list), !str_detect(patient_race_list, ","), patient_race_list != "Not Applicable", patient_race_list != "Not Recorded") %>%
mutate(age_breaks = cut(as.numeric(patient_age), breaks = c(0, 20, 40, 60, 80, 100))) %>%
filter(!is.na(age_breaks), yr != "2016") %>%
group_by(mo_yr, age_breaks) %>%
count()
age_baselines <- new_ems_data %>%
mutate(age_breaks = cut(as.numeric(patient_age), breaks = c(0, 20, 40, 60, 80, 100))) %>%
group_by(age_breaks, mo_yr) %>%
filter(incident_date < "2020-03-01", yr != "2016") %>%
count() %>%
ungroup() %>%
group_by(age_breaks) %>%
summarize(mean = mean(n))
## `summarise()` ungrouping output (override with `.groups` argument)
plot_data <- full_join(monthly_counts, age_baselines) %>%
mutate(pct_of_baseline = (n / mean) * 100)
## Joining, by = "age_breaks"
## Plot temporal trends in volume
ggplot(plot_data) +
geom_line(aes(x = mo_yr, y = pct_of_baseline, color = age_breaks, group = age_breaks), alpha = 0.7) +
geom_point(aes(x = mo_yr, y = pct_of_baseline, color = age_breaks), alpha = 1) +
geom_vline(xintercept = dmy("02-01-2020"), linetype = "dashed", alpha = 0.8) +
scale_color_brewer(palette = "YlGnBu") +
#facet_wrap(~age_breaks, ncol = 1) +
#labs(x = "Date", y = "Monthly Incidents", color = "Complaint", title = "Call Volume for Top Provider Impressions: Charlottesville\n") +
theme_minimal() #+
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
#plot_theme